
#  Primary analysis script for
#  Bailard, Graham, Gross, Porter, & Tromble,
#  "Combatting Hateful Attitudes and Browsing Behavior: The Case of Antisemitism"
#  Forthcoming in the Journal of Experimental Political Science

#  This script carries out all analysis from the main text and appendix
#  except for Appendix 3.9, which depends on the visit-level browsing
#  data. The code that generated Appendix 3.9 appears in clean_web.R.

#  This script is organized as a nested list. In RStudio, hit Alt+O to 
#  collapse all section headers. In other software, note that all lines 
#  with a section header start with a comment, end with a series of 
#  hyphens, and are indented hierarchically.

#  For information about the original computing environment and package
#  versions, see readme.txt.

############################################ ---------
#  SET UP ENVIRONMENT
#    Packages and helper functions -----------------------

rm(list=ls())

library(logr)
log_open()

#  Change this to match your file system
setwd("C:/Users/tuq69844/Dropbox/anti hate treatments")

#  Packages
library(tidyverse)
library(coefplot)
library(lubridate)
library(haven)
library(readxl)
library(estimatr)
library(glmnet)
library(texreg)
library(xtable)

theme_allPlots = function(textsize=8, axisTextReduce=1, gridColor = "gray80"){
  theme_bw() +
    theme(
      strip.background = element_blank(),
      strip.placement = "outside",
      strip.text = element_text(margin = margin(0,0,3,0), size = textsize, color = "gray5"),
      plot.margin = margin(1,1,1,1),
      panel.grid = element_line(size = .25, color = gridColor),
      axis.ticks = element_line(size = .25, color = gridColor),
      axis.title = element_text(size = textsize, color = "gray5"),
      axis.title.x = element_text(margin = margin(6,0,0,0), size = textSize),
      axis.title.y = element_text(margin = margin(0,6,0,0), size = textSize),
      axis.text = element_text(size = textsize - axisTextReduce, color = "gray5"),
      legend.text = element_text(size = textsize, color = "gray5"),
      legend.title = element_text(size = textsize, color = "gray5"),
      panel.grid.major = element_line(size=.25, color=gridColor),
      panel.grid.minor = element_line(size=.25, color=gridColor),
    )
}

gsub_factor = function(string, replace, x) factor(gsub(string, replace, x), unique(gsub(string, replace, levels(x))))

format_num <- function(x, digits = 3) {
  x <- as.numeric(x)
  return(paste0(sprintf(paste0("%.", digits, "f"), x)))
}

#    Load data -----------------------

textSize = 8

#  Load survey data
d_covar  = read_rds( "replication_file/data_survey_completes_clean.rds")
d_break  = read_rds( "replication_file/data_survey_breakoffs_clean.rds")
d_dv     = read_rds( "replication_file/data_survey_DVs_clean.rds")
d_open   = read_csv( "replication_file/data_survey_openends.csv")
codebook = read_xlsx("replication_file/codebook.xlsx")

#  Load web data
d_web_daily   = read_csv("replication_file/data_web_daily_clean.csv")
d_web_weekly  = read_csv("replication_file/data_web_weekly_clean.csv")
d_web_prepost = read_csv("replication_file/data_web_prepost_clean.csv")

#  Add covariates to web data
d_web_daily   = left_join(d_web_daily, d_covar)
d_web_weekly  = left_join(d_web_weekly, d_covar)
d_web_prepost = left_join(d_web_prepost, d_covar)

#  Add treatment assignments to web data
d_web_daily   = left_join(d_web_daily, d_dv %>% select(caseid, z) %>% distinct)
d_web_weekly  = left_join(d_web_weekly, d_dv %>% select(caseid, z) %>% distinct)
d_web_prepost = left_join(d_web_prepost, d_dv %>% select(caseid, z) %>% distinct)

#  Add pre-treatment browsing info to survey data
d_covar = left_join(d_covar, d_web_prepost %>% select(caseid, contains("_pre")) %>% distinct)
d_break = left_join(d_break, d_web_prepost %>% select(caseid, contains("_pre")) %>% distinct)

#  Add covariates to stacked DV data, store as new object
d_survey = left_join(d_dv, d_covar)

#    Edit data -----------------------

#  Add group var to d_web_prepost 

d_web_prepost = d_web_prepost %>%
  mutate(
    Group = case_when(
      any_HA_total_pre   == 0 ~ "No pre-treatment visits",
      any_HA_antisem_pre == 1 ~ "Visited antisemitic sites",
      any_HA_antisem_pre == 0 ~ "Visited other H/A sites"
    )
  )

#  Prepare for preregistered balance/attrition checks

d_attrit = d_web_prepost %>%
  group_by(caseid) %>%
  summarize(total=n(), pre=as.numeric(pre_treatment)) %>%
  mutate(attrit_from_pulse = ifelse(total==pre, 1, 0)) %>%
  select(-pre) %>%
  distinct %>%
  select(caseid, attrit_from_pulse)

d_covar = left_join(d_covar, d_attrit)

d_covar = cbind(d_covar %>% select(-starts_with("attrit")), d_covar %>% select(starts_with("attrit")))

quality_check_transformations = function(x){
  x %>%
    select(caseid, z, everything()) %>%
    select(
      -starttime_w1, 
      
      #  drop a reference group for education
      -educ_Somecollege,
      
      #  drop unlogged versions of count variables (prespecified that logged is preferred)
      -(visits_HA_antisem_pre:time_HA_total_pre)
    )  %>%
    select(
      #  drop NA indicators that generate violations of full rank
      -votepref_2016_NA, -votepref_2020_NA, -relig_which_NA, -contains("_total_"), -employ_NA
    ) %>%
    mutate_at(
      vars(-caseid),
      function(x){x[is.na(x)]=mean(x, na.rm=T);x}
    )
}

d_checks = quality_check_transformations(d_covar)

#  New dataset for differential breakoff checks (reviewer comment)

d_checks_breakoff = bind_rows(
  d_break %>% quality_check_transformations %>% mutate(breakoff = 1),
  d_checks %>% mutate(breakoff = 0)
)

############################################ -----------
#  QUALITY CHECKS (Appendix 1)   -----------

#    Attrition from browsing data (Appendix 1.1, Tables SI-1 and 3) -------------

#  Test for differential attrition on average
reg_pulse_attrit_avg = lm_robust(attrit_from_pulse ~ z, data = d_covar)

#  Export table SI-1
texreg(
  reg_pulse_attrit_avg,
  booktabs = T,
  only.contents = T,
  include.rsq = F,
  include.ci = F,
  include.rmse =F,
  digits = 3,
  use.packages = F,
  stars = c(.05, .01), 
  custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
  caption.above = T,
  caption = "Difference in mean attrition from web sample by treatment group.",
  float.pos = "!h",
  label = "tab:attrit_web_avg"
) %>%
  gsub("begin.center.", "centering", .) %>%
  gsub(".end.center.", "", .) %>%
  write(
    "current_draft/appendix_figures/tab_attrit_web_avg.txt"
  )

#  Test if covariates predict attrition
regs_diff_attrit = d_covar %>%
  (function(x){
    list(
      lm(as.formula(paste("attrit_from_pulse ~", paste(names(d_covar)[6:(length(d_covar)-2)], collapse = "+"))), data = x),
      lm(as.formula(paste("attrit_from_pulse ~", paste0("z*", paste(names(d_covar)[6:(length(d_covar)-2)]), collapse = "+"))), data = x)
    )
  })

#  Export Table SI-3
anova(regs_diff_attrit[[1]], regs_diff_attrit[[2]]) %>% 
  as.data.frame %>%
  mutate_at(
    vars(Res.Df, Df), function(x) as.character(round(x))
  ) %>%
  xtable::xtable(digits = 3,
                 caption = "F-test for heterogeneity between treatment groups in attrition from web sample.",
                 caption.placement = "top",
                 label = "tab:attrit_web_F") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        include.rownames=F) %>%
  write(
    "current_draft/appendix_figures/tab_attrit_web_F.txt"
  )

#    Attrition between waves (Appendix 1.1, Tables SI-2 and 4) ------------------

#  Test for differential attrition on average
reg_att_attrit_avg = d_covar %>%
  lm_robust(attrit ~ z, data = .)

#  Export table (Table SI-2)
texreg(
  reg_att_attrit_avg,
  booktabs = T,
  only.contents = T,
  include.rsq = F,
  include.ci = F,
  include.rmse =F,
  digits = 3,
  use.packages = F,
  stars = c(.05, .01), 
  custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
  caption.above = T,
  caption = "Difference in mean attrition between survey waves by treatment group.",
  float.pos = "!h",
  label = "tab:attrit_att_avg"
) %>%
  gsub("begin.center.", "centering", .) %>%
  gsub(".end.center.", "", .) %>%
  write(
    "current_draft/appendix_figures/tab_attrit_att_avg.txt"
  )

#  Test if covariates predict attrition
attrition_on_observables_test =
  (function(x){
    list(
      lm(as.formula(paste("attrit ~", paste(names(d_covar)[6:(length(d_covar)-2)], collapse = "+"))), data = x),
      lm(as.formula(paste("attrit ~", paste0("z*", paste(names(d_covar)[6:(length(d_covar)-2)]), collapse = "+"))), data = x)
    )
  })

regs_diff_attrit = d_covar %>%
  attrition_on_observables_test()

#  Export table (Table SI-4)
anova(regs_diff_attrit[[1]], regs_diff_attrit[[2]])  %>%
  as.data.frame %>%
  mutate_at(
    vars(Res.Df, Df), function(x) as.character(round(x))
  ) %>%
  xtable::xtable(digits = 3,
                 caption = "F-test for heterogeneity between treatment groups in attrition between survey waves.",
                 caption.placement = "top",
                 label = "tab:attrit_att_F") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        include.rownames=F) %>%
  write(
    "current_draft/appendix_figures/tab_attrit_att_F.txt"
  )


#    Breakoff (Appendix 1.2, Tables SI-5 to 8) ---------------

#  Test for differential breakoff on average
reg_breakoff_avg = d_checks_breakoff %>%
  lm_robust(breakoff ~ z, data = .)

#  Export table (Table SI-5)
texreg(
  reg_breakoff_avg,
  booktabs = T,
  only.contents = T,
  include.rsq = F,
  include.ci = F,
  include.rmse =F,
  digits = 3,
  use.packages = F,
  stars = c(.05, .01), 
  custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
  caption.above = T,
  caption = "Difference in mean breakoff during wave 1 by treatment group.",
  float.pos = "!h",
  label = "tab:breakoff_att_avg"
) %>%
  gsub("begin.center.", "centering", .) %>%
  gsub(".end.center.", "", .) %>%
  write(
    "current_draft/appendix_figures/tab_breakoff_att_avg.txt"
  )

#  Test for differential breakoff by covariates
reg_breakoff_diff = 
  d_break %>% 
  select(-attrit) %>%
  quality_check_transformations() %>%
  select(-caseid) %>%
  lm(z ~ ., data = .)

reg_breakoff_1 = 
  d_break  %>% quality_check_transformations() %>%
  select(-caseid)%>%
  lm(z ~ 1, data = .)

#  Export f-test table (Table SI-6)
anova(reg_breakoff_1, reg_breakoff_diff) %>%
  as.data.frame %>%
  .[2,] %>%
  mutate_at(
    vars(Res.Df, Df), function(x) as.character(round(x))
  ) %>%
  xtable::xtable(digits = 3,
                 caption = "F-test for heterogeneity in wave 1 breakoff.",
                 caption.placement = "top",
                 label = "tab:breakoff_att_F") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        include.rownames=F) %>%
  write(
    "current_draft/appendix_figures/tab_breakoff_att_F.txt"
  )

#  Create and add differential breakoff weights (because there was breakoff; see appendix 1.2, page 4) 
fit_missing = 
  glm(
    complete ~ .,
    data = d_checks_breakoff %>%
      mutate(complete = 1-breakoff) %>% 
      select(-caseid, -breakoff, -z, -attrit, -number_of_days_pre,
             -attrit_from_pulse,
             -urbanicity_NA, -marital_NA
      ) ,
    family = binomial(link = "logit")
  )

d_checks_breakoff$weight = 1 / (predict(fit_missing, d_checks_breakoff) %>% (function(x)exp(x)/(1+exp(x)))) #(exp(predict(fit_missing, d_checks)) / 1 + exp(predict(fit_missing, d_checks)))
d_checks_breakoff$weight[is.nan(d_checks_breakoff$weight)] = 1
d_checks_breakoff$weight[d_checks_breakoff$weight > 3] = 3

d_survey = left_join(d_survey, d_checks_breakoff %>% select(caseid, weight))
d_web_daily = left_join(d_web_daily, d_checks_breakoff %>% select(caseid, weight))
d_web_prepost = left_join(d_web_prepost, d_checks_breakoff %>% select(caseid, weight))
d_web_weekly = left_join(d_web_weekly, d_checks_breakoff %>% select(caseid, weight))

#    Export covariate-by-covariate comparison tables (Tables SI-7 and 8)
tab_breakoff_means = d_break %>%
  select(-starts_with("urbanicity")) %>%
  quality_check_transformations() %>%
  
  gather(variable, value, -caseid, -z) %>%
  group_by(variable, z) %>%
  summarize(
    theMean = mean(value, na.rm=T)
  ) %>%
  spread(z, theMean) %>%
  rename(Control = "0", Treatment = "1") %>%
  filter(variable!="starttime_w1",
         !(grepl("^visits|^time", variable) & !grepl("log", variable)))

tab_breakoff = d_break %>%
  select(-starts_with("urbanicity")) %>%
  quality_check_transformations() %>%
  
  gather(variable, value, -caseid, -z) %>%
  group_by(variable) %>%
  summarize(tidy(lm_robust(value ~ z))) %>%
  filter(term=="z",
         term!="starttime_w1",
         !(grepl("^visits|^time", variable) & !grepl("log", variable)))

left_join(
  tab_breakoff_means,
  tab_breakoff
) %>%
  select(
    Covariate = variable,
    Control,
    Treatment,
    Difference = estimate,
    SE = std.error,
    t = statistic,
    p = p.value
  ) %>%
  xtable::xtable(digits = 3,
                 caption = "Difference in means tests among wave 1 breakoffs.",
                 caption.placement = "top",
                 label = "tab:breakoff") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        size = "\\scriptsize",
        include.rownames=F) %>%
  write("current_draft/appendix_figures/tab_breakoff.txt")

left_join(
  tab_breakoff_means,
  tab_breakoff
) %>%
  arrange(p.value) %>%
  select(
    Covariate = variable,
    Control,
    Treatment,
    Difference = estimate,
    SE = std.error,
    t = statistic,
    p = p.value
  ) %>%
  xtable::xtable(digits = 3,
                 caption = "Difference in means tests among wave 1 breakoffs, sorted by p-value.",
                 caption.placement = "top",
                 label = "tab:breakoff_sorted") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        size = "\\scriptsize",
        include.rownames=F) %>%
  write("current_draft/appendix_figures/tab_breakoff_sorted.txt")

#    Balance tests (Appendix 1.3, Tables SI-9 and 10) -------------

#  Create table of means by condition
tab_balance_means = d_checks %>%
  select(-marital_NA, -urbanicity_NA) %>%
  gather(variable, value, -caseid, -z) %>%
  group_by(variable, z) %>%
  summarize(
    theMean = mean(value, na.rm=T)
  ) %>%
  spread(z, theMean) %>%
  rename(Control = "0", Treatment = "1") %>%
  filter(variable!="starttime_w1",
         !(grepl("^visits|^time", variable) & !grepl("log", variable)))

tab_balance = d_checks %>%
  select(-marital_NA, -urbanicity_NA) %>%
  gather(variable, value, -caseid, -z) %>%
  group_by(variable) %>%
  summarize(tidy(lm_robust(value ~ z))) %>%
  filter(term=="z",
         term!="starttime_w1",
         !(grepl("^visits|^time", variable) & !grepl("log", variable)))

#  Export summary table (Table SI-9)
rbind(
  c(Min = min(tab_balance$statistic)),
  quantile(tab_balance$statistic, c(.025, .05, .95, .975), na.rm=T) %>% as.data.frame,
  max(tab_balance$statistic)
) %>% t %>% as_tibble %>% rename(Min="1", Max="6") %>%
  xtable::xtable(digits = 3,
                 caption = "Distribution of t-statistics from difference in means tests for covariate balance.",
                 caption.placement = "top",
                 label = "tab:balance_summary") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        include.rownames=F,
        size = "\\footnotesize") %>%
  write("current_draft/appendix_figures/tab_balance_summary.txt")

#  Export covaraite-by-covariate table (Table SI-10)
left_join(
  tab_balance_means,
  tab_balance
) %>%
  select(
    Covariate = variable,
    Control,
    Treatment,
    Difference = estimate,
    SE = std.error,
    t = statistic,
    p = p.value
  ) %>%
  xtable::xtable(digits = 3,
                 caption = "Difference in means tests for covariate balance.",
                 caption.placement = "top",
                 label = "tab:balance") %>%
  print(booktabs = T,
        caption.placement = "top",
        table.placement = "!h",
        include.rownames=F,
        size = "\\scriptsize") %>%
  write("current_draft/appendix_figures/tab_balance.txt")

#################################### -----------------------
#  SURVEY RESULTS (first section of results, Appendix 2) -----------------------
#    Data prep -------------

dv_complete = d_survey %>% 
  select(
    -contains("state"), -contains("starttime"),
    
    #  remove unlogged versions of web controls
    -visits_HA_other_pre, -visits_HA_total_pre, -visits_HA_antisem_pre,
    -time_HA_other_pre, -time_HA_total_pre, -time_HA_antisem_pre
  )

dv_complete = dv_complete[complete.cases(dv_complete),]

#    Estimates: immediate effects --------------

#  objects for use below
dep_vars = unique(dv_complete$variable)
controls = formulas = regs_w = regs = regs_bivar = regs_hirisk = regs_bivar_hirisk = list()

#  loop over DVs, fit estimates
set.seed(0); for(i in 1:length(dep_vars)){
  thevar = dep_vars[i]
  
  x_in = dv_complete %>% filter(variable == thevar) %>% select(-caseid, -z, -(variable:wave), -var)
  weight = x_in$weight
  
  #  use LASSO to choose covariates
  thex = as.matrix(x_in %>% select(-weight))
  they = dv_complete %>% filter(variable == thevar) %>% .$value
  
  lambda_optimal = cv.glmnet(
    x = thex,
    y = they
  )$lambda.min
  
  test = glmnet(thex, they, alpha = 1, lambda = lambda_optimal) 
  
  #  main estimates: store control variables and formulas
  controls[[i]] = predict(test, type = "coefficients", s = lambda_optimal)[-1,] %>% .[.!=0] %>% names
  formulas[[i]] = as.formula(paste0("value ~ z + ", paste(controls[[i]], collapse = " + ")))
  
  #  main estimates: adjusted, weighted, and unadjusted/bivariate
  regs[[i]]       = lm_robust(formulas[[i]], filter(dv_complete, variable==thevar))
  regs_w[[i]]     = lm_robust(formulas[[i]], filter(dv_complete, variable==thevar), weights = weight)
  regs_bivar[[i]] = lm_robust(value ~ z,     filter(dv_complete, variable==thevar))
  
  #  effects among pre-treatment HA visitors: remove that variable from the adjustment set
  these_controls = controls[[i]][which(!grepl("any_HA_total_pre", controls[[i]]))]
  this_formula   = as.formula(paste0("value ~ z + ", paste(these_controls, collapse = " + ")))
  
  #  effects among pre-treatment HA visitors: adjusted & unadjusted estimates
  regs_hirisk[[i]]       = lm_robust(this_formula, filter(dv_complete, variable==thevar, any_HA_total_pre==1))
  regs_bivar_hirisk[[i]] = lm_robust(value ~ z,    filter(dv_complete, variable==thevar, any_HA_total_pre==1))
}

#  name list elements after the DVs they pertain to
names(controls) = names(formulas) = names(regs) = names(regs_w) = names(regs_bivar) = names(regs_hirisk) = names(regs_bivar_hirisk) = dep_vars

#    Estimates: persistence of effects ------------

#  DEPENDS ON "controls" and "dep_vars" OBJECTS CREATED IN "Immediate effects" section above.
#  Code follows same structure as that section --- see comments there for an explanation.

persist_DVs = dep_vars[grep("therm_|discrim_|policy_index|stereo_index|contact_jew", dep_vars)]

id_no_attrit = d_covar$caseid[d_covar$attrit==0]

regs_persist = regs_persist_bivar = controls_persist = formulas_persist = regs_persist_hirisk = regs_persist_hirisk_bivar = list()

set.seed(0)
for(i in 1:length(persist_DVs)){
  thevar = persist_DVs[i]
  
  thex = as.matrix(dv_complete %>% filter(variable == thevar, caseid %in% id_no_attrit) %>% select(-caseid, -z, -(variable:wave), -var))
  they = dv_complete %>% filter(variable == thevar, caseid %in% id_no_attrit) %>% .$value
  
  lambda_optimal = cv.glmnet(
    x = thex,
    y = they
  )$lambda.min
  
  test = glmnet(thex, they, alpha = 1, lambda = lambda_optimal) 
  
  controls_persist[[i]] = predict(test, type = "coefficients", s = lambda_optimal)[-1,] %>% .[.!=0] %>% names
  formulas_persist[[i]] = as.formula(paste0("value ~ z + ", paste(controls_persist[[i]], collapse = " + ")))
  
  regs_persist[[i]] =       lm_robust(formulas_persist[[i]], filter(dv_complete, variable==thevar, caseid %in% id_no_attrit))
  regs_persist_bivar[[i]] = lm_robust(value ~ z,             filter(dv_complete, variable==thevar, caseid %in% id_no_attrit))
  
  these_controls = controls_persist[[i]][which(!grepl("any_HA_total_pre|troll", controls_persist[[i]]))]
  this_formula   = as.formula(paste0("value ~ z + ", paste(these_controls, collapse = " + ")))
  
  regs_persist_hirisk[[i]] =       lm_robust(this_formula, filter(dv_complete, variable==thevar, caseid %in% id_no_attrit, any_HA_total_pre == 1))
  regs_persist_hirisk_bivar[[i]] = lm_robust(value ~ z,             filter(dv_complete, variable==thevar, caseid %in% id_no_attrit, any_HA_total_pre == 1))
}

names(regs_persist) = names(regs_persist_bivar) = names(regs_persist_hirisk) = names(regs_persist_hirisk_bivar) = persist_DVs


#    Figure 1a: immediate effects -------------------

tab_plot_main = 
  bind_rows(
    map_df(regs, tidy, .id="variable") %>% mutate(Model="Covariate-adjusted"),
    map_df(regs_bivar, tidy, .id="variable") %>% mutate(Model="Bivariate")
  ) %>% 
  filter(term=="z") %>%
  left_join(codebook) %>%
  mutate(wave = paste("Wave", gsub(".+_w", "", variable)),
         Model = factor(Model, c("Covariate-adjusted", "Bivariate")),
         label = ifelse(hypothesis_number %in% c("H2", "H3"), gsub(" ", "\n", hypothesis_label), label),
         label = factor(label),
         label = relevel(label, "Index")) %>%
  arrange(hypothesis_number)

tab_plot_hirisk = 
  bind_rows(
    map_df(regs_hirisk, tidy, .id="variable") %>% mutate(Model="Covariate-adjusted"),
    map_df(regs_bivar_hirisk, tidy, .id="variable") %>% mutate(Model="Bivariate")
  ) %>% 
  filter(term=="z") %>%
  left_join(codebook) %>%
  mutate(wave = paste("Wave", gsub(".+_w", "", variable)),
         Model = factor(Model, c("Covariate-adjusted", "Bivariate")),
         label = ifelse(hypothesis_number %in% c("H2", "H3"), gsub(" ", "\n", hypothesis_label), label),
         label = factor(label),
         label = relevel(label, "Index")) %>%
  arrange(hypothesis_number)

tab_plot = 
  bind_rows(
    tab_plot_main %>% mutate(subset = "Full sample"),
    tab_plot_hirisk %>% mutate(subset = "Visited H/A site pre-treatment")
  ) %>%
  mutate(Hypothesis = paste0(hypothesis_number, ". ", hypothesis_label) %>% factor(rev(unique(.))))

g = tab_plot %>%
  filter(grepl("Sympathetic|Feeling|Discrimination|Index", label),
         Model == "Covariate-adjusted",
         subset == "Full sample",
         wave == "Wave 1") %>%
  ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = Hypothesis, color = subset, shape = subset, fill = subset)) +
  geom_vline(aes(xintercept=0), color="white") +
  geom_vline(aes(xintercept = 0), lty = 2, color = "gray50", size = .5) +
  geom_errorbarh(height = 0, position = position_dodgev(height=-.5), size = .25) +
  geom_errorbarh(aes(xmin = conf.low+std.error*.315, xmax = conf.high-std.error*.315),
                 height = 0, position = position_dodgev(height=-.5), size = 1) +
  geom_point(position = position_dodgev(height=-.5), size = 2.5) +
  theme_allPlots(8) +
  theme(strip.placement = "outside",plot.tag = element_text(margin = margin(0,0,-10,0)),
        strip.background = element_blank(),
        panel.spacing.y = unit(.75, "cm"),
        axis.title = element_text(size = 8),
        axis.title.y = element_blank(),
        axis.title.x = element_text(margin = margin(5,0,-12,0)),
        axis.text.y = element_text(hjust = 0),
        axis.ticks.y = element_blank(),
        plot.title = element_text(margin = margin(0,0,-12,0)),
        legend.title = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        legend.key = element_blank(),
        legend.key.width = unit(1, "cm"),
        panel.grid.major.y = element_blank()) +
  scale_color_manual(values = c("gray5", "gray50")) +
  scale_fill_manual(values = c("gray5", "gray50")) +
  scale_shape_manual(values = c(21, 22)) +
  scale_x_continuous(breaks = (-1:7)/20) +
  coord_cartesian(xlim = c(-.05, .35)) +
  labs(x = "Treatment effect", y = "Dependent variable")

ggsave("current_draft/main_figures/fig_attitudes_maintext.pdf", g, "pdf", width = 6, height = 1.5)

#    Figure 1b: persistence ------------------

tab_plot_persist_main = 
  bind_rows(
    map_df(regs_persist[-grep("contact", names(regs_persist))], tidy, .id="variable") %>% mutate(Model="Covariate-adjusted"),
    map_df(regs_persist_bivar[-grep("contact", names(regs_persist))], tidy, .id="variable") %>% mutate(Model="Bivariate")
  ) %>% 
  filter(term=="z") %>%
  left_join(codebook) %>%
  mutate(wave = gsub(".+_w", "", variable),
         Model = factor(Model, c("Covariate-adjusted", "Bivariate")),
         label = ifelse(hypothesis_number %in% c("H2", "H3"), gsub(" ", "\n", hypothesis_label), label),
         label = factor(label),
         label = relevel(label, "Index")) %>%
  arrange(hypothesis_number)

tab_plot_persist_hirisk = 
  bind_rows(
    map_df(regs_persist_hirisk[-grep("contact", names(regs_persist))], tidy, .id="variable") %>% mutate(Model="Covariate-adjusted"),
    map_df(regs_persist_hirisk_bivar[-grep("contact", names(regs_persist))], tidy, .id="variable") %>% mutate(Model="Bivariate")
  ) %>% 
  filter(term=="z") %>%
  left_join(codebook) %>%
  mutate(wave = gsub(".+_w", "", variable),
         Model = factor(Model, c("Covariate-adjusted", "Bivariate")),
         label = ifelse(hypothesis_number %in% c("H2", "H3"), gsub(" ", "\n", hypothesis_label), label),
         label = factor(label),
         label = relevel(label, "Index")) %>%
  arrange(hypothesis_number)

tab_plot_persist = bind_rows(
  tab_plot_persist_main %>% mutate(Facet = "Full sample"),
  tab_plot_persist_hirisk %>% mutate(Facet = "Visited H/A site pre-treatment")
) 

g = tab_plot_persist %>%
  filter(
    Model == "Covariate-adjusted",
    Facet == "Full sample",
    wave == 2) %>%
  mutate(Hypothesis = paste0(hypothesis_number, ". ", hypothesis_label)) %>%
  ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = Hypothesis)) +
  geom_vline(aes(xintercept=0), color="white") +
  geom_vline(aes(xintercept = 0), lty = 2, color = "gray50", size = .5) +
  geom_errorbarh(height = 0, position = position_dodgev(height=-.5), size = .25) +
  geom_errorbarh(aes(xmin = conf.low+std.error*.315, xmax = conf.high-std.error*.315),
                 height = 0, position = position_dodgev(height=-.5), size = 1) +
  geom_point(position = position_dodgev(height=-.5), size = 2.5) +
  theme_allPlots(8) +
  theme(strip.placement = "outside",plot.tag = element_text(margin = margin(0,0,-10,0)),
        strip.background = element_blank(),
        panel.spacing.y = unit(.75, "cm"),
        axis.title = element_text(size = 8),
        axis.title.y = element_blank(),
        axis.title.x = element_text(margin = margin(5,0,-12,0)),
        axis.text.y = element_text(hjust = 0),
        axis.ticks.y = element_blank(),
        plot.title = element_text(margin = margin(0,0,-12,0)),
        legend.title = element_blank(),
        legend.position = "none",
        legend.background = element_blank(),
        legend.key = element_blank(),
        legend.key.width = unit(1, "cm"),
        panel.grid.major.y = element_blank()) +
  scale_color_manual(values = c("gray5", "gray50")) +
  scale_fill_manual(values = c("gray5", "gray50")) +
  scale_shape_manual(values = c(21, 22)) +
  scale_x_continuous(breaks = (-1:7)/20) +
  scale_y_discrete(limits = rev) +
  coord_cartesian(xlim = c(-.05, .35)) +
  labs(x = "Treatment effect", y = "Dependent variable"); g

ggsave("current_draft/main_figures/fig_attitude_persistence.pdf", g, "pdf", width = 6, height = 1.25)


#    Appendix 2.1-2.3: immediate effects (Tables SI-11, 12, 15-17) --------------------

makeTab_attitude_w1 = function(theRegs, theCaption, theLabel){
  
  model_names = dv_complete$hypothesis_label[match(names(theRegs), dv_complete$variable)]
  
  out = texreg(theRegs,
               booktabs = T,
               only.contents = T,
               include.rsq = F,
               include.ci = F,
               include.rmse =F,
               digits = 3,
               use.packages = F,
               stars = c(.10, .02),  # for one-sided p-values
               custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
               custom.model.names = model_names,
               #custom.header = list(`All H/A sites`=1:3, `Antisemitic sites`=4:6),
               #custom.model.names = rep(c("Any visit", "Total visits", "Time spent"), 2),
               caption.above = T,
               caption = theCaption,
               label = theLabel, float.pos = "!h"
  )
  
  out = strsplit(out, "\\n")[[1]]
  
  #  remove stars from constant row
  out = ifelse(grepl("Constant", out), gsub("\\*", "", out), out)
  
  #  display one-sided significance levels
  out = gsub("<0.02", "<0.01", out)
  out = gsub("<0.1", "<0.05", out)
  
  #  add space between rows of coefs
  out = gsub("Treatment", "\\\\\\\\[-1.5ex] Treatment", out)
  
  out = gsub("begin.center.", "centering", out)
  out = gsub(".end.center.", "", out)
  
  #out = gsub("bottomrule", "bottomrule\\\\\\\\[-4ex]", out)
  
  out = ifelse(grepl("Sympathetic", out), 
               paste(
                 gsub(" reaction| thermometer |perceptions | solutions|Stereotypes ", "", out),
                 gsub("Sympathetic |Feeling |Discrimination |Policy ", "", out)),
               out
  )
  
  #out = gsub("figure", "subfigure", out)
  
  out
}

makeTab_attitude_w1(
  regs[grep("emo_sympathetic_w1|therm_jew_w1|discrim_jew_w1|policy_index_w1|stereo_index_w1", names(regs))],
  "Immediate effect on attitudes, full sample.",
  "tab:attitude_w1_full"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_w1_full.txt"
  )

makeTab_attitude_w1(
  regs_w[grep("emo_sympathetic_w1|therm_jew_w1|discrim_jew_w1|policy_index_w1|stereo_index_w1", names(regs))],
  "Immediate effect on attitudes, full sample.",
  "tab:attitude_w1_weighted"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_w1_weighted.txt"
  )

makeTab_attitude_w1(
  regs_hirisk[grep("emo_sympathetic_w1|therm_jew_w1|discrim_jew_w1|policy_index_w1|stereo_index_w1", names(regs))],
  "Immediate effect on attitudes, visited H/A sites pre-treatment.",
  "tab:attitude_w1_atrisk"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_w1_atrisk.txt"
  )

makeTab_attitude_w1(
  regs_bivar[grep("emo_sympathetic_w1|therm_jew_w1|discrim_jew_w1|policy_index_w1|stereo_index_w1", names(regs))],
  "Immediate effect on attitudes, no covariate adjustment, full sample.",
  "tab:attitude_w1_full_bivar"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_w1_full_bivar.txt"
  )

makeTab_attitude_w1(
  regs_bivar_hirisk[grep("emo_sympathetic_w1|therm_jew_w1|discrim_jew_w1|policy_index_w1|stereo_index_w1", names(regs))],
  "Immediate effect on attitudes, no covariate adjustment, visited H/A sites pre-treatment.",
  "tab:attitude_w1_atrisk_bivar"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_w1_atrisk_bivar.txt"
  )



#    Appendix 2.1-2.3: persistence (Tables SI-13, 14, 18, 19) ---------

makeTab_persist = function(theRegs, theCaption, theLabel){
  
  out = texreg(theRegs,
               booktabs = T,
               only.contents = T,
               include.rsq = F,
               include.ci = F,
               include.rmse =F,
               digits = 3,
               use.packages = F,
               stars = c(.10, .02),  # for one-sided p-values
               custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
               custom.header = list(`Feeling thermometer`=1:2, `Discrimination perceptions`=3:4, `Policy solutions`=5:6, Stereotypes=7:8),
               custom.model.names = rep(c("Wave 1", "Wave 2"), 4),
               caption.above = T,
               caption = theCaption,
               label = theLabel, float.pos = "!h"
  )
  
  out = strsplit(out, "\\n")[[1]]
  
  #  remove stars from constant row
  out = ifelse(grepl("Constant", out), gsub("\\*", "", out), out)
  
  #  display one-sided significance levels
  out = gsub("<0.02", "<0.01", out)
  out = gsub("<0.1", "<0.05", out)
  
  #  add space between rows of coefs
  out = gsub("Treatment", "\\\\\\\\[-1.5ex] Treatment", out)
  
  out = gsub("begin.center.", "centering", out)
  out = gsub(".end.center.", "", out)
  
  # out = gsub("bottomrule", "bottomrule\\\\\\\\[-4ex]", out)
  
  out = ifelse(grepl("Feeling", out), 
               paste(
                 gsub(" thermometer| perceptions|Policy solutions|Stereotypes", "", out),
                 gsub("Feeling |Discrimination ", "", out)),
               out
  )
  
  #out = gsub("figure", "subfigure", out)
  
  out
}

makeTab_persist(
  regs_persist[-grep("contact", names(regs_persist))],
  "Persistence of attitudinal effects, all wave 2 subjects.",
  "tab:attitude_persist_full"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_persist_full.txt"
  )

makeTab_persist(
  regs_persist_hirisk[-grep("contact", names(regs_persist_hirisk))],
  "Persistence of attitudinal effects, visited H/A sites pre-treatment and took wave 2 survey.",
  "tab:attitude_persist_atrisk"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_persist_atrisk.txt"
  )

makeTab_persist(
  regs_persist_bivar[-grep("contact", names(regs_persist_bivar))],
  "Persistence of attitudinal effects, no covariate adjustment, all wave 2 subjects.",
  "tab:attitude_persist_full_bivar"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_persist_full_bivar.txt"
  )

makeTab_persist(
  regs_persist_hirisk_bivar[-grep("contact", names(regs_persist_hirisk_bivar))],
  "Persistence of attitudinal effects, no covariate adjustment, visited H/A sites pre-treatment and took wave 2 survey.",
  "tab:attitude_persist_atrisk_bivar"
) %>%
  write(
    "current_draft/appendix_figures/tab_att_persist_atrisk_bivar.txt"
  )

#    Appendix 2.4: index components (Figure SI-1) ------------------------

#  Depends on tab_plot object created for Figure 1a

makePlot_all_attitude_vars = function(x){
  x %>%
    filter(wave=="Wave 1") %>%
    ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = label, color = Model, shape = Model)) +
    geom_vline(aes(xintercept=0), color="white") +
    geom_vline(aes(xintercept = 0), lty = 2, color = "gray50", size = .5) +
    geom_errorbarh(height = 0, position = position_dodgev(height=-.6)) +
    geom_errorbarh(aes(xmin = conf.low+std.error*.315, xmax = conf.high-std.error*.315),
                   height = 0, position = position_dodgev(height=-.6), size = 1.25) +
    geom_point(position = position_dodgev(height=-.6), size = 3, fill = "white", color = "gray5") +

    facet_grid(hypothesis_number ~ ., switch = "y", scales = "free_y", space = "free_y") +
    theme_allPlots() +
    theme(strip.placement = "outside",
          strip.background = element_blank(),
          axis.title = element_text(size = 9),
          legend.title = element_blank(),
          legend.position = "bottom",
          legend.background = element_blank(),
          legend.key = element_blank(),
          legend.key.width = unit(1, "cm")) +
    scale_color_manual(values = c("gray5", "gray50")) +
    scale_shape_manual(values = c(16, 21)) +
    coord_cartesian(xlim = c(-.1, .2)) +
    labs(x = "ATE estimate", y = "Dependent variable")
}

g = tab_plot %>%
  filter(hypothesis_number %in% c("H4", "H5")) %>%
  mutate(Hypothesis = gsub(" solutions", "\nsolutions", Hypothesis)) %>%
  mutate(Hypothesis = factor(Hypothesis, unique(Hypothesis))) %>%
  makePlot_all_attitude_vars() +
  facet_grid(Hypothesis ~ subset, scales = "free_y", space = "free_y", switch = "y")

ggsave("current_draft/appendix_figures/fig_attitude_index_components.pdf", g, "pdf", width = 6.5, height = 6)

#    Appendix 2.4: contact hypothesis (Table SI-20) ----------------

#  copy-pasted out of function above, and modified to suit

out = texreg(c(regs_persist[9], regs_persist_bivar[9], regs_persist_hirisk[9], regs_persist_hirisk_bivar[9]),
             booktabs = T,
             only.contents = T,
             include.rsq = F,
             include.ci = F,
             include.rmse =F,
             digits = 3,
             use.packages = F,
             stars = c(.05, .01),  # for one-sided p-values
             custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
             custom.header = list(`Full sample`=1:2, `Visited H/A sites pre-treatment`=3:4),
             custom.model.names = rep(c("Adjusted", "Bivariate"), 2),
             caption.above = T,
             caption = "Effect on contact with Jews, wave 2.",
             label = "tab:contact", 
             float.pos = "!h"
)

out = strsplit(out, "\\n")[[1]]

#  remove stars from constant row
out = ifelse(grepl("Constant", out), gsub("\\*", "", out), out)

#  add space between rows of coefs
out = gsub("Treatment", "\\\\\\\\[-1.5ex] Treatment", out)

out = gsub("begin.center.", "centering", out)
out = gsub(".end.center.", "", out)

#out = gsub("bottomrule", "bottomrule\\\\\\\\[-4ex]", out)

out = ifelse(grepl("Visited", out), 
             paste(
               gsub("Full sample| pre-treatment", "", out),
               gsub("Visited H/A sites ", "", out)),
             out
)

write(out, "current_draft/appendix_figures/tab_contact.txt")

#    Appendix 2.5: hetero effects (Tables SI-21 to 27) ----------------------

#  DEPENDS ON "controls" and "dep_vars" OBJECTS CREATED IN MAIN ANALYSIS

#  get DVs and same controls from main analysis
hetero_DVs = dep_vars[grep("emo_|therm_|discrim_|stereo_index|policy_index", dep_vars)]
hetero_controls = controls[grep("emo_|therm_|discrim_|stereo_index|policy_index", names(controls))]

#  function to estimate hetero effects & create tables
run_hetero_effects = function(hetero_variable, hetero_label){
  
  #  lists to store regressions
  regs = list()
  regs_bivar = list()
  
  #  loop over DVs, fit regression
  for(i in 1:length(hetero_DVs)){
    
    #  extract control variables
    these_controls = hetero_controls[[i]][which(!grepl(hetero_variable, hetero_controls[[i]]))]
    
    #  create regression formulas
    formula_bivar = paste("value ~ z * ", hetero_variable)
    formula_adjusted = as.formula(paste(formula_bivar, "+", paste(these_controls, collapse = "+")))
    formula_bivar = as.formula(formula_bivar)
    
    #  fit regressions
    regs[[i]] = lm_robust(formula_adjusted, data = filter(dv_complete, variable==hetero_DVs[i]))
    regs_bivar[[i]] = lm_robust(formula_bivar, data = filter(dv_complete, variable==hetero_DVs[i]))
  }
  
  #  name list entries after pertinent DV
  names(regs) = names(regs_bivar) = hetero_DVs
  
  #  generate list of covariates to exclude from the regression tables
  omit_controls = names(d_covar)[which(names(d_covar) != hetero_variable & !grepl("weight|^z$|caseid|starttime", names(d_covar)))] %>% paste0(collapse="|")
  
  #  create regression table
  reg_tab = texreg(list(regs_bivar$emo_sympathetic_w1, regs$emo_sympathetic_w1, regs_bivar$therm_jew_w1, regs$therm_jew_w1, regs_bivar$discrim_jew_w1, regs$discrim_jew_w1, regs_bivar$policy_index_w1, regs$policy_index_w1, regs_bivar$stereo_index_w1, regs$stereo_index_w1),
                   custom.gof.rows = list(Controls = rep(c("No", "Yes"), 5)),
                   custom.coef.names = c("Constant", "Treatment", hetero_label, paste("Treatment TIMES", hetero_label)),
                   omit.coef = omit_controls,
                   booktabs = T,
                   only.contents = T,
                   include.rsq = F,
                   include.ci = F,
                   include.rmse =F,
                   digits = 3,
                   use.packages = F) %>%
    gsub(".+Constant", "Constant", .) %>%
    gsub(".multicolumn.+", "", .) %>%
    gsub("TIMES", "$\\\\times$", .)
  
  #  output regression objects & table
  list(adjusted = regs, bivariate = regs_bivar, reg_tab = reg_tab)  
}

#  run all regressions and create table
regs_hetero_CRT     = run_hetero_effects("crt_total",     "Cognitive reflection")
regs_hetero_empathy = run_hetero_effects("empathy_index", "Perspective-taking")
regs_hetero_conspir = run_hetero_effects("conspir_index", "Conspiratorial beliefs")
regs_hetero_knowl   = run_hetero_effects("knowl_total",   "Political knowledge")
regs_hetero_contact = run_hetero_effects("contact_jew",   "Contact with Jews")
regs_hetero_pid7    = run_hetero_effects("pid7",          "Party ID")
regs_hetero_anyHA   = run_hetero_effects("any_HA_total_pre",        "Visited a H/A site")
regs_hetero_numHA   = run_hetero_effects("visits_HA_total_log_pre", "Number of visits")
regs_hetero_timeHA  = run_hetero_effects("time_HA_total_log_pre",   "Time spent on H/A sites")

#  export Tables SI-21 to 27
write(regs_hetero_CRT$reg_tab,     "current_draft/appendix_figures/tab_hetero_CRT.txt")
write(regs_hetero_empathy$reg_tab, "current_draft/appendix_figures/tab_hetero_empathy.txt")
write(regs_hetero_conspir$reg_tab, "current_draft/appendix_figures/tab_hetero_conspir.txt")
write(regs_hetero_knowl$reg_tab,   "current_draft/appendix_figures/tab_hetero_knowl.txt")
write(regs_hetero_contact$reg_tab, "current_draft/appendix_figures/tab_hetero_contact.txt")
write(regs_hetero_anyHA$reg_tab,   "current_draft/appendix_figures/tab_hetero_anyHA.txt")
write(regs_hetero_numHA$reg_tab,   "current_draft/appendix_figures/tab_hetero_numHA.txt")
write(regs_hetero_timeHA$reg_tab,  "current_draft/appendix_figures/tab_hetero_timeHA.txt")
write(regs_hetero_pid7$reg_tab,    "current_draft/appendix_figures/tab_hetero_pid7.txt")

#    Appendix 2.6: open-ended responses (Figures SI-2 and 3) ------------------

#  Combine responses to second prompt
d_open = d_open %>%
  mutate(
    know_jew = recode(know_jew, ` 1`="Yes", ` 2`="No"),
    know_jew_open = ifelse(know_jew == "Yes", yes_kn_jew_antisem, no_kn_jew_antisem)
  )

#  Figure SI-2: word count distribution, first prompt
pdf("current_draft/appendix_figures/fig_word_count.pdf", width = 5, height = 2.5)
par(mar=c(5,4,0,0)+0.1)
d_open %>% .$outgroup_exp %>% str_count(" ") %>% hist(breaks=100, main = NULL, xlab="Number of words", freq=F)
dev.off()

#  Figure SI-3: word count distribution, second prompt
pdf("current_draft/appendix_figures/fig_word_count_know.pdf", width = 5, height = 2.5)
par(mar=c(5,4,0,0)+0.1)
d_open %>% .$know_jew_open %>% str_count(" ") %>% hist(breaks=100, main = NULL, xlab="Number of words", freq=F)
dev.off()

#  Text: mean/median number of words, first prompt
d_open %>% .$outgroup_exp %>% str_count(" ") %>% mean()
d_open %>% .$outgroup_exp %>% str_count(" ") %>% median()

#  Text: mean/median number of words, second prompt
d_open %>% .$know_jew_open %>% str_count(" ") %>% mean()
d_open %>% .$know_jew_open %>% str_count(" ") %>% median()

#################################### -----------------------
#  BROWSING RESULTS (second section of results and Appendix 3)   -----------------------
#    Data prep ------------
#       Exclude subjects with no post-treatment data (per attrition test in Appendix 1.1) ----------------

any_post = d_web_prepost %>% filter(!pre_treatment) %>% .$caseid

d_daily   = d_web_daily %>% filter(caseid %in% any_post)
d_weekly  = d_web_weekly %>% filter(caseid %in% any_post)
d_prepost = d_web_prepost %>% filter(caseid %in% any_post)

#       Trim daily/weekly data to a time window everyone has (for use in Appendix 3.3) -------------

d_daily = d_daily %>%
  mutate(
    days_since = ifelse(pre_treatment, -1, days_since)
  ) %>%
  filter(days_since <= 14)

d_weekly = d_weekly %>%
  filter(weeks_since <= 3)

#    Table 1: Summary of browsing data ----------------

out = 
  bind_rows(
    d_prepost,
    d_prepost %>% mutate(z=-1)
  )  %>%
  filter(pre_treatment==F) %>%
  (function(x){bind_rows(
    x %>% mutate(Group = "All subjects"),
    x %>% mutate(
      Group = case_when(
        any_HA_total_pre == 0 ~ "No pre-treatment H/A site visits",
        any_HA_antisem_pre == 1 ~ "zINDENT Antisemitic sites",
        any_HA_antisem_pre == 0 ~ "zINDENT Other H/A sites"
      )
    ),
    x %>% filter(any_HA_total_pre == 1) %>%
      mutate(Group = "Visited H/A site pre-treatment")
  )}) %>%
  group_by(Group, z) %>%
  summarize(
    N=n(), 
    Any    = mean(any_HA_total), 
    Visits = mean(visits_HA_total), 
    Time   = mean(time_HA_total), 
    Any_AS = mean(any_HA_antisem), 
    mean(visits_HA_antisem), 
    mean(time_HA_antisem)
  ) %>%
  rename(Condition=z) %>%
  mutate(Condition = recode(Condition, `-1`="All respondents", `0`="Control", `1`="Treatment"),
         N = as.character(N),
         Group = ifelse(Condition=="All respondents", Group, ""),
         Any = format_num(Any, 3),
         Any_AS = format_num(Any_AS, 3)) %>%
  xtable() %>%
  print(
    include.rownames = F, include.colnames = F, only.contents=T,
    booktabs = T, hline.after = nrow(.)
  )

out = strsplit(out, "\n")[[1]]

out = ifelse(grepl("visit|sites", out, ignore.case=T), paste("\\\\[-1.5ex] ", out), out)

out = gsub("zINDENT", "~~~~~", out)

write(out,
    "current_draft/main_figures/tab_web_summary.txt"
)

#    Estimate treatment effects -----------
#       Calculate effects for full sample ----------------

#  Note: the outlier's respondent ID is 81375753. See Appendix 3.4.

#  Dataset with post-treatment browsing data only + none missing
d_prepost_complete =
  d_prepost %>%
  filter(!pre_treatment) %>%
  select(-pre_treatment) %>%
  .[complete.cases(.),]

#  Split off the weight variable (for differential attrition weights)
theweight = d_prepost_complete$weight
d_prepost_complete = d_prepost_complete %>% select(-weight)

#  Covariate object for LASSO
thex = as.matrix(d_prepost_complete %>% select(-caseid, -z, -(visits_HA_antisem:time_HA_total_log), -Group, -starttime_w1, -number_of_days)) %>% apply(2, as.numeric)

#  Specify outcome variables to use
outcome_vars_web = c("any_HA_total", "visits_HA_total_log", "time_HA_total_log", 
                     "any_HA_antisem", "visits_HA_antisem_log", "time_HA_antisem_log", 
                     "any_HA_other", "visits_HA_other_log", "time_HA_other_log")

#  Create objects to store results
controls_web = formulas_web = regs_web = regs_web_w = regs_web_outlier = regs_web_bivar = daily_web = daily_web_bivar = weekly_web = weekly_web_bivar = list()

#  Loop over specified outcome variables, store results
set.seed(0); for(i in 1:length(outcome_vars_web)){
  
  #  Use LASSO to choose covariates
  they = as.data.frame(d_prepost_complete)[,outcome_vars_web[i]]
  
  lambda_optimal = cv.glmnet(
    x = thex,
    y = they
  )$lambda.min
  
  test = glmnet(thex, they, alpha = 1, lambda = lambda_optimal) 
  
  #  Store covariates & regression formula
  controls_web[[i]] = predict(test, type = "coefficients", s = lambda_optimal)[-1,] %>% .[.!=0] %>% names
  formulas_web[[i]] = as.formula(paste0(outcome_vars_web[i], " ~ z + ", paste(controls_web[[i]], collapse = " + ")))
  
  #  Fit main regressions
  regs_web[[i]] = lm_robust(formulas_web[[i]], d_prepost_complete)
  regs_web_w[[i]] = lm_robust(formulas_web[[i]], d_prepost_complete, weights = theweight)
  regs_web_outlier[[i]] = lm_robust(formulas_web[[i]], d_prepost_complete %>% filter(caseid != 81375753))
  regs_web_bivar[[i]] = lm_robust(as.formula(paste0(outcome_vars_web[i], " ~ z")), d_prepost_complete)
  
  #  Fit daily and weekly regressions
  #  (formula code is identical to what goes into formulas_web --- could not get it to pull from that object for whatever reason)
  daily_web[[i]] = d_daily %>%
    group_by(days_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web[i], " ~ z + ", paste(controls_web[[i]], collapse = " + ")))))
    )
  daily_web_bivar[[i]] = d_daily %>%
    group_by(days_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web[i], " ~ z"))))
    )
  weekly_web[[i]] = d_weekly %>%
    group_by(weeks_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web[i], " ~ z + ", paste(controls_web[[i]], collapse = " + ")))))
    )
  weekly_web_bivar[[i]] = d_weekly %>%
    group_by(weeks_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web[i], " ~ z"))))
    )
}

#  Name list entries after the DV they correspond to
names(controls_web) = names(formulas_web) = 
  names(regs_web) = names(regs_web_w) = 
  names(regs_web_bivar) = names(regs_web_outlier) =
  names(daily_web) = names(daily_web_bivar) = 
  names(weekly_web) = names(weekly_web_bivar) = 
  outcome_vars_web

#       Calculate effects on those with pre-treatment HA visits ---------------------

#  This section follows the same structure as the full-sample effects above.
#  See comments there for an explanation of each component of the following code.

d_prepost_complete_hirisk =
  d_prepost %>%
  filter(!pre_treatment,
         any_HA_total_pre == 1) %>%
  select(-pre_treatment, 
         -contains("state"), 
         -any_HA_total_pre, -any_HA_other_pre
         ) %>%
  .[complete.cases(.),] 

thex = as.matrix(d_prepost_complete_hirisk %>% select(-caseid, -z, -(visits_HA_antisem:time_HA_total_log), -Group, -starttime_w1, -number_of_days)) %>% apply(2, as.numeric)

outcome_vars_web_hirisk = c("any_HA_total", "visits_HA_total_log", "time_HA_total_log", 
                            "any_HA_antisem", "visits_HA_antisem_log", "time_HA_antisem_log", 
                            "any_HA_other", "visits_HA_other_log", "time_HA_other_log")

controls_web_hirisk = formulas_web_hirisk = regs_web_hirisk = regs_web_hirisk_outlier = regs_web_hirisk_bivar = daily_web_hirisk = daily_web_hirisk_bivar = weekly_web_hirisk = weekly_web_hirisk_bivar = list()

set.seed(0); for(i in 1:length(outcome_vars_web_hirisk)){
  
  they = as.data.frame(d_prepost_complete_hirisk)[,outcome_vars_web_hirisk[i]]
  
  lambda_optimal = cv.glmnet(
    x = thex,
    y = they
  )$lambda.min
  
  test = glmnet(thex, they, alpha = 1, lambda = lambda_optimal) 
  
  controls_web_hirisk[[i]] = predict(test, type = "coefficients", s = lambda_optimal)[-1,] %>% .[.!=0] %>% names
  formulas_web_hirisk[[i]] = as.formula(paste0(outcome_vars_web_hirisk[i], " ~ z + ", paste(controls_web_hirisk[[i]], collapse = " + ")))
  regs_web_hirisk[[i]] = lm_robust(formulas_web_hirisk[[i]], d_prepost_complete_hirisk)
  regs_web_hirisk_outlier[[i]] = lm_robust(formulas_web_hirisk[[i]], d_prepost_complete_hirisk %>% filter(caseid != 81375753))
  regs_web_hirisk_bivar[[i]] = lm_robust(as.formula(paste0(outcome_vars_web_hirisk[i], " ~ z")), d_prepost_complete_hirisk)
  
  # bizarrely, the below will not accept anything out of the formulas_web_hirisk list,
  # but will accept the exact same code that creates it just fine
  daily_web_hirisk[[i]] = d_daily %>%
    filter(any_HA_total_pre==1) %>%
    group_by(days_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web_hirisk[i], " ~ z + ", paste(controls_web_hirisk[[i]], collapse = " + ")))))
    )
  daily_web_hirisk_bivar[[i]] = d_daily %>%
    filter(any_HA_total_pre==1) %>%
    group_by(days_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web_hirisk[i], " ~ z"))))
    )
  
  weekly_web_hirisk[[i]] = d_weekly %>%
    filter(any_HA_total_pre==1) %>%
    group_by(weeks_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web_hirisk[i], " ~ z + ", paste(controls_web_hirisk[[i]], collapse = " + ")))))
    )
  weekly_web_hirisk_bivar[[i]] = d_weekly %>%
    filter(any_HA_total_pre==1) %>%
    group_by(weeks_since) %>%
    summarize(
      tidy(lm_robust(as.formula(paste0(outcome_vars_web_hirisk[i], " ~ z"))))
    )
}

names(controls_web_hirisk) = names(formulas_web_hirisk) = 
  names(regs_web_hirisk) = names(regs_web_hirisk_bivar) = names(regs_web_hirisk_outlier) =
  names(daily_web_hirisk) = names(daily_web_hirisk_bivar) = 
  names(weekly_web_hirisk) = names(weekly_web_hirisk_bivar) = 
  outcome_vars_web_hirisk


#       Put estimates into data frames for plotting ------------------

tab_plot_web = 
  bind_rows(
    map_df(regs_web, tidy, .id="variable") %>% mutate(Model="Covariate-adjusted", subset = "Full sample"),
    map_df(regs_web_bivar, tidy, .id="variable") %>% mutate(Model="Bivariate", subset = "Full sample"),
    map_df(regs_web_hirisk, tidy, .id="variable") %>% mutate(Model="Covariate-adjusted", subset = "Visited H/A site pre-treatment"),
    map_df(regs_web_hirisk_bivar, tidy, .id="variable") %>% mutate(Model="Bivariate", subset = "Visited H/A site pre-treatment")
  ) %>% 
  group_by() %>%
  filter(term=="z") %>%
  mutate(
    var = gsub("_antisem|_total", "", variable),
    Variable = recode_factor(
      var,
      any_HA = "Any visit (binary)",
      visits_HA_log = "Number of visits (logged)",
      time_HA_log = "Time spent (logged)"
    ),
    Category = case_when(
      grepl("antisem", variable) ~ "Antisemitic sites",
      grepl("total", variable) ~ "All H/A sites",
      grepl("other", variable) ~ "All other H/A sites"
    ),
    Model = factor(Model, unique(Model)),
    Label = recode_factor(
      variable,
      any_HA_total = "Visited any H/A\nsite (binary)",
      visits_HA_total_log = "Number of visits\nto H/A sites (log)",
      time_HA_total_log = "Time spent on\nH/A  sites (log)",
      
      any_HA_other = "Visited other H/A\nsites (binary)",
      visits_HA_other_log = "Number of visits\nto other sites (log)",
      time_HA_other_log = "Time spent on\nother sites (log)",
      
      any_HA_antisem = "Visited an antisemitic\nsite (binary)",
      visits_HA_antisem_log = "Number of visits to\nantisemitic sites (log)",
      time_HA_antisem_log = "Time spent on anti-\nsemitic sites (log)"
      
      # any_HA_antisem = "Visited an anti-\nsemitic site\n(binary)",
      # visits_HA_antisem_log = "Number of visits\nto antisemitic\nsites (log)",
      # time_HA_antisem_log = "Time spent on\nantisemitic\nsites (log)"
    ),
    
    Label = factor(Label, rev(levels(Label))),
    
    Label_2 = gsub_factor("Visited.+", "Any visit\n(binary)", Label) %>%
              gsub_factor("Number.+", "Number of\nvisits (log)", .) %>%
              gsub_factor("Time spent.+", "Time spent\n(log)", .),
    
    Category = factor(Category, rev(unique(Category)))
  ) 

#    Main results: pre/post effects ------------------
#       Main text: Figure 2 ------------

#  Function to create plot
makePlot_web_main = function(x, x_limit = .15){
  x %>%
    ggplot(aes(x = estimate, xmin = conf.low, xmax = conf.high, y = Label_2, color = Category, shape = Category, fill = Category)) +
    geom_vline(aes(xintercept=0), color="white") +
    geom_vline(aes(xintercept = 0), lty = 2, color = "gray50", size = .25) +
    geom_errorbarh(height = 0, position = position_dodgev(height=-.5), size = .25) +
    geom_errorbarh(aes(xmin = conf.low+std.error*.3, xmax = conf.high-std.error*.3),
                   height = 0, position = position_dodgev(height=-.5), size = .75) +
    geom_point(position = position_dodgev(height=-.5), size = 2) +
    #facet_grid(Category ~ ., switch = "y", scales = "free") +
    theme_allPlots(10) +
    theme(strip.placement = "outside",
          strip.background = element_blank(),
          strip.text.y.left = element_text(margin = margin(0,6,0,0)),
          axis.title.y = element_blank(),
          axis.title.x = element_text(margin = margin(5,0,-12,0)),
          axis.title = element_text(size = 8),
          axis.ticks.y = element_blank(),
          axis.text.y = element_text(hjust = 0),
          plot.title = element_text(margin = margin(0,0,-12,0)),
          #plot.margin = margin(0,0,-8,0),
          panel.grid.major.y = element_blank(),
          legend.title = element_blank(),#element_text(size = 8),
          legend.position = "none",
          legend.background = element_blank(),
          legend.key = element_blank(),
          legend.key.width = unit(1, "cm")) +
    scale_color_manual(values = c("gray5", "gray50")) +
    scale_fill_manual(values = c("gray5", "gray50")) +
    scale_x_continuous(breaks = (-20:20)/20, labels = function(x) gsub("0\\.", ".", x) %>% gsub("^\\.0", "0", .)) +
    coord_cartesian(xlim = c(-1*x_limit, x_limit)) +
    scale_shape_manual(values = c(21, 22)) +
    #coord_cartesian(xlim = c(-.2, .2)) +
    labs(x = "ATE estimate", y = "Dependent variable") +
    
    geom_text(
      aes(y = "Any visit\n(binary)", x = -.95*x_limit, label = Category),
      size = 3, position = coefplot::position_dodgev(height = -.5),
      hjust = 0,
      legend.include = FALSE
    )
}

#  Create and export plot
g = tab_plot_web %>%
  filter(Category != "All other H/A sites",
         subset == "Full sample",
         Model == "Covariate-adjusted") %>%
  makePlot_web_main(); g
  
ggsave("current_draft/main_figures/fig_web_maintext.pdf",g , "pdf", width = 5, height = 1.75)

#       Corresponding appendix tables (Appendix 3.1-3.4, Tables SI-28 to 34) --------------

#  Function to create tables
makeTab_web = function(theRegs, theCaption, theLabel){
  out = texreg(theRegs,
               booktabs = T,
               only.contents = T,
               include.rsq = F,
               include.ci = F,
               include.rmse =F,
               digits = 3,
               use.packages = F,
               fontsize = "footnotesize",
               stars = c(.10, .02),  # for one-sided p-values
               custom.coef.map = list(`(Intercept)`="Constant", `z`="Treatment"),
               custom.header = list(`All H/A sites`=1:3, `Antisemitic sites`=4:6, `Other H/A sites`=7:9),
               custom.model.names = rep(c("Any visit", "Total visits", "Time spent"), 3),
               caption.above = T,
               caption = theCaption,
               label = theLabel, float.pos = "!h"
  )
  
  out = strsplit(out, "\\n")[[1]]
  
  #  remove stars from constant row
  out = ifelse(grepl("Constant", out), gsub("\\*", "", out), out)
  
  #  display one-sided significance levels
  out = gsub("<0.02", "<0.01", out)
  out = gsub("<0.1", "<0.05", out)
  
  #  add space between rows of coefs
  out = gsub("Treatment", "\\\\\\\\[-1.5ex] Treatment", out)

  out = gsub("begin.center.", "centering", out)
  out = gsub(".end.center.", "", out)
  
  #out = gsub("bottomrule", "bottomrule\\\\\\\\[-4ex]", out)
  
  #out = gsub("figure", "subfigure", out)
  
  out
}

#  Effects on full sample (Table SI-28)
makeTab_web(
  regs_web,
  "Effect on browsing behavior, full sample.",
  "tab:web_main"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_main.txt"
  )

#  Effects on those with pre-treatment HA visits (Table SI-29)
makeTab_web(
  regs_web_hirisk,
  "Effect on browsing behavior, visited H/A site pre-treatment.",
  "tab:web_atrisk"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_atrisk.txt"
  )

#  Weighted estimates (Table SI-30)
makeTab_web(
  regs_web_w,
  "Effect on browsing behavior, weighted for differential attrition.",
  "tab:web_weighted"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_weighted.txt"
  )

#  Unadjusted estimates, full sample (Table SI-31)
makeTab_web(
  regs_web_bivar,
  "Effect on browsing behavior, no covariate adjustment, full sample.",
  "tab:web_main_bivar"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_main_bivar.txt"
  )

#  Unadjusted estimate, pre-treatment HA visit (Table SI-32)
makeTab_web(
  regs_web_hirisk_bivar,
  "Effect on browsing behavior, no covariate adjustment, visited H/A site pre-treatment.",
  "tab:web_atrisk_bivar"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_atrisk_bivar.txt"
  )

#  Full-sample estimates dropping an outlier (Table SI-33)
makeTab_web(
  regs_web_outlier,
  "Effect on browsing behavior, full sample with outlier excluded.",
  "tab:web_outlier"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_outlier.txt"
  )

#  Equivalent for pre-treamtent HA visits (Table SI-34)
makeTab_web(
  regs_web_hirisk_outlier,
  "Effect on browsing behavior, visited H/A site pre-treatment with outlier excluded.",
  "tab:web_atrisk_outlier"
) %>%
  write(
    "current_draft/appendix_figures/tab_web_atrisk_outlier.txt"
  )

#    Time trend results (Appendix 3.5) ----------------------
#       Daily (Figure SI-4) -----------

tab_plot_web_daily = 
  bind_rows(
    map_df(daily_web, function(x)x, .id="variable") %>% mutate(Model="Covariate\nadjusted"),
    map_df(daily_web_bivar, function(x)x, .id="variable") %>% mutate(Model="Bivariate")
  ) %>% 
  group_by() %>%
  filter(term=="z") %>%
  mutate(
    var = gsub("_antisem|_total", "", variable),
    Variable = recode_factor(
      var,
      any_HA = "Any visit (binary)",
      visits_HA_log = "Number of visits (logged)",
      time_HA_log = "Time spent (logged)"
    ),
    Category = case_when(
      grepl("antisem", variable) ~ "Antisemitic",
      grepl("total", variable) ~ "All H/A sites"
    ),
    Model = factor(Model, unique(Model)),
    Label = recode_factor(
      variable,
      any_HA_total = "Visited any H/A\nsite (binary)",
      visits_HA_total_log = "Number of visits\nto H/A sites (log)",
      time_HA_total_log = "Time spent on\nH/A  sites (log)",
      any_HA_antisem = "Visited an anti-\nsemitic site\n(binary)",
      visits_HA_antisem_log = "Number of visits\nto antisemitic\nsites (log)",
      time_HA_antisem_log = "Time spent on\nantisemitic\nsites (log)",
      any_HA_other = "Visited other H/A\nsites (binary)",
      visits_HA_other_log = "Number of visits\nto other sites (log)",
      time_HA_other_log = "Time spent on\nother sites (log)"
    )
  )

dodgewidth = .5

g = tab_plot_web_daily %>%
  mutate(
    Model = gsub_factor("Covariate.", "Covariate-", Model)
  ) %>%
  filter(grepl("antisem", variable)) %>%
  ggplot(
    aes(x = days_since, y = estimate, ymin = conf.low, ymax = conf.high, color = Model, shape = Model, fill = Model)
  ) +
  geom_vline(aes(xintercept = -0.5), color = "white") +
  geom_vline(aes(xintercept = -0.5), lty=2, size=.25, color="gray20") +
  geom_hline(aes(yintercept = 0), color = "white") +
  geom_hline(aes(yintercept = 0), lty=2, size=.25, color="gray20") +
  geom_point(position = position_dodge(width = dodgewidth), fill="white") +
  geom_errorbar(position = position_dodge(width = dodgewidth),
                width = 0, size = .25) +
  geom_path(position = position_dodge(width=dodgewidth)) +
  scale_color_manual(values = c("gray5", "gray50")) +
  scale_shape_manual(values = c(16, 21)) +
  theme_allPlots(9,2) +
  theme(panel.grid.minor.x = element_blank(),
        legend.position = "bottom",
        axis.title.x = element_text(margin = margin(6,0,-8,0)),
        axis.title.y = element_text(margin = margin(0,0,0,0))) +
  facet_grid(~ Variable, scales = "free") +
  scale_x_continuous(breaks = -1:13, labels = c("Pre", 0:13)) +
  coord_cartesian(ylim = c(-.04, .04)) +
  labs(x = "Days since treatment", y = "Estimate")

ggsave("current_draft/appendix_figures/fig_web_daily.pdf", g, "pdf", width = 6.5, height = 3.5)

#       Weekly (Figure SI-5) -----------

tab_plot_web_weekly = 
  bind_rows(
    map_df(weekly_web, function(x)x, .id="variable") %>% mutate(Model="Covariate\nadjusted"),
    map_df(weekly_web_bivar, function(x)x, .id="variable") %>% mutate(Model="Bivariate")
  ) %>% 
  group_by() %>%
  filter(term=="z") %>%
  mutate(
    var = gsub("_antisem|_total", "", variable),
    Variable = recode_factor(
      var,
      any_HA = "Any visit (binary)",
      visits_HA_log = "Number of visits (logged)",
      time_HA_log = "Time spent (logged)"
    ),
    Category = case_when(
      grepl("antisem", variable) ~ "Antisemitic",
      grepl("total", variable) ~ "All H/A sites"
    ),
    Model = factor(Model, unique(Model)),
    Label = recode_factor(
      variable,
      any_HA_total = "Visited any H/A\nsite (binary)",
      visits_HA_total_log = "Number of visits\nto H/A sites (log)",
      time_HA_total_log = "Time spent on\nH/A  sites (log)",
      any_HA_antisem = "Visited an anti-\nsemitic site\n(binary)",
      visits_HA_antisem_log = "Number of visits\nto antisemitic\nsites (log)",
      time_HA_antisem_log = "Time spent on\nantisemitic\nsites (log)"
    )
  )

dodgewidth = .25

g = tab_plot_web_weekly %>%
  mutate(
    Model = gsub_factor("Covariate.", "Covariate-", Model)
  ) %>%
  filter(grepl("antisem", variable)) %>%
  ggplot(
    aes(x = weeks_since, y = estimate, ymin = conf.low, ymax = conf.high, color = Model, shape = Model)
  ) +
  geom_vline(aes(xintercept = 0.5), color = "white") +
  geom_vline(aes(xintercept = 0.5), lty=2, size=.25, color="gray20") +
  geom_hline(aes(yintercept = 0), color = "white") +
  geom_hline(aes(yintercept = 0), lty=2, size=.25, color="gray20") +
  geom_point(position = position_dodge(width = dodgewidth), fill="white") +
  geom_errorbar(position = position_dodge(width = dodgewidth),
                width = 0, size = .25) +
  geom_path(position = position_dodge(width=dodgewidth)) +
  scale_color_manual(values = c("gray5", "gray50")) +
  scale_shape_manual(values = c(16, 21)) +
  theme_allPlots(9,1) +
  theme(panel.grid.minor.x = element_blank(),
        legend.position = "bottom",
        axis.title.x = element_text(margin = margin(6,0,-8,0)),
        axis.title.y = element_text(margin = margin(0,0,0,0))) +
  facet_grid(~ Variable, scales = "free") +
  scale_x_continuous(breaks = -1:13, labels = c("Pre", 0:13)) +
  coord_cartesian(ylim = c(-.04, .04)) +
  labs(x = "Days since treatment", y = "Estimate") +
  scale_x_continuous(labels = c("Pre-\ntreat", "First\nweek", "Second\nweek", "Third\nweek"), breaks = 0:3, expand = c(.1,.1))

ggsave("current_draft/appendix_figures/fig_web_weekly.pdf", g, "pdf", width = 6.5, height = 3.5)

#    Hetero effects (Appendix 3.6, Tables SI-35 to 58) ----------------------

#  DEPENDS ON "controls_web" and "outcome_vars_web" OBJECTS CREATED ABOVE (line 544, "Calculate effects")

#  Function to make the regression tables
make_regTab = function(x, site_group, omit_controls, hetero_label){
  out = texreg(x,
         custom.gof.rows = list(Controls = rep(c("No", "Yes"), 3)),
         custom.coef.names = c("Constant", "Treatment", hetero_label, paste("Treatment TIMES", hetero_label)),
         omit.coef = omit_controls,
         booktabs = T,
         #only.contents = T,
         include.rsq = F,
         include.ci = F,
         include.rmse =F,
         digits = 3,
         use.packages = F,
         custom.header = list(`Any visit`=1:2, `Num. of visits`=3:4, `Time spent`=5:6),
         caption.above = T,
         caption = paste0("Heterogeneous effects on visits to ", site_group, ", by ", tolower(substr(hetero_label,1,1)), substr(hetero_label,2,nchar(hetero_label)), "." )) %>%
    #gsub(".+Constant", "Constant", .) %>%
    #gsub(".multicolumn.+", "", .) %>%
    gsub("TIMES", "$\\\\times$", .) %>%
    gsub(".begin.center.", "\\\\centering", .) %>%
    gsub(".end.center.", "", .) %>%
    gsub(".begin.table.", "", .) %>%
    gsub(".end.table.", "", .) #%>%
   # gsub("bottomrule", "bottomrule \\\\\\\\[-4ex]", .)
  
  out = strsplit(out, "\n")[[1]]
  
  gsub("& Model.+|.cmidrule.+", "", out)
}

#  Function to calculate all estimates and create tables
run_hetero_effects_web = function(hetero_variable, hetero_label){
  
  regs = list()
  regs_bivar = list()
  
  for(i in 1:length(outcome_vars_web)){
    
    these_controls = controls_web[[i]][which(!grepl(hetero_variable, controls_web[[i]]))]
    
    formula_bivar = paste(outcome_vars_web[i], " ~ z * ", hetero_variable)
    formula_adjusted = as.formula(paste(formula_bivar, "+", paste(these_controls, collapse = "+")))
    formula_bivar = as.formula(formula_bivar)
    
    regs[[i]] = lm_robust(formula_adjusted, data = d_prepost_complete)
    regs_bivar[[i]] = lm_robust(formula_bivar, data = d_prepost_complete)
  }
  
  names(regs) = names(regs_bivar) = outcome_vars_web
  
  omit_controls = names(d_prepost_complete)[which(names(d_prepost_complete) != hetero_variable & !grepl("weight|^z$|caseid|starttime", names(d_prepost_complete)))] %>% paste0(collapse="$|^")
  
  reg_tab_total = make_regTab(
    list(
      regs_bivar[[1]], regs[[1]], regs_bivar[[2]], regs[[2]], regs_bivar[[3]], regs[[3]]
    ), "all H/A sites", omit_controls, hetero_label)
  reg_tab_antisem = make_regTab(
    list(
      regs_bivar[[1+3]], regs[[1+3]], regs_bivar[[2+3]], regs[[2+3]], regs_bivar[[3+3]], regs[[3+3]]
    ), "antisemitic sites", omit_controls, hetero_label)
  reg_tab_other = make_regTab(
    list(
      regs_bivar[[1+6]], regs[[1+6]], regs_bivar[[2+6]], regs[[2+6]], regs_bivar[[3+6]], regs[[3+6]]
    ), "other H/A sites", omit_controls, hetero_label)
  
  list(adjusted = regs, bivariate = regs_bivar, reg_tab_total = reg_tab_total, reg_tab_antisem = reg_tab_antisem, reg_tab_other = reg_tab_other)  
}

#  Estimate all hetero effects
regs_hetero_web_CRT     = run_hetero_effects_web("crt_total",     "Cognitive reflection")
regs_hetero_web_empathy = run_hetero_effects_web("empathy_index", "Perspective-taking")
regs_hetero_web_conspir = run_hetero_effects_web("conspir_index", "Conspiratorial beliefs")
regs_hetero_web_knowl   = run_hetero_effects_web("knowl_total",   "Political knowledge")
regs_hetero_web_contact = run_hetero_effects_web("contact_jew",   "Contact with Jews")
regs_hetero_web_anyHA   = run_hetero_effects_web("any_HA_total_pre",   "Visited a H/A site pre-treatment")
regs_hetero_web_mobile  = run_hetero_effects_web("internet_more_mobile",   "Favors mobile web browser")
regs_hetero_web_pid7    = run_hetero_effects_web("pid7",   "Party ID (7-point)")

#  Export tables SI-35, 38, 41 ... 56: all HA sites
write(regs_hetero_web_CRT$reg_tab_total,     "current_draft/appendix_figures/tab_hetero_web_CRT_total.txt")
write(regs_hetero_web_empathy$reg_tab_total, "current_draft/appendix_figures/tab_hetero_web_empathy_total.txt")
write(regs_hetero_web_conspir$reg_tab_total, "current_draft/appendix_figures/tab_hetero_web_conspir_total.txt")
write(regs_hetero_web_knowl$reg_tab_total,   "current_draft/appendix_figures/tab_hetero_web_knowl_total.txt")
write(regs_hetero_web_contact$reg_tab_total, "current_draft/appendix_figures/tab_hetero_web_contact_total.txt")
write(regs_hetero_web_anyHA$reg_tab_total,   "current_draft/appendix_figures/tab_hetero_web_anyHA_total.txt")
write(regs_hetero_web_mobile$reg_tab_total,  "current_draft/appendix_figures/tab_hetero_web_mobile_total.txt")
write(regs_hetero_web_pid7$reg_tab_total,    "current_draft/appendix_figures/tab_hetero_web_pid7_total.txt")

#  Export tables SI-36, 39, 42 ... 57: antisem sites
write(regs_hetero_web_CRT$reg_tab_antisem,     "current_draft/appendix_figures/tab_hetero_web_CRT_antisem.txt")
write(regs_hetero_web_empathy$reg_tab_antisem, "current_draft/appendix_figures/tab_hetero_web_empathy_antisem.txt")
write(regs_hetero_web_conspir$reg_tab_antisem, "current_draft/appendix_figures/tab_hetero_web_conspir_antisem.txt")
write(regs_hetero_web_knowl$reg_tab_antisem,   "current_draft/appendix_figures/tab_hetero_web_knowl_antisem.txt")
write(regs_hetero_web_contact$reg_tab_antisem, "current_draft/appendix_figures/tab_hetero_web_contact_antisem.txt")
write(regs_hetero_web_anyHA$reg_tab_antisem,   "current_draft/appendix_figures/tab_hetero_web_anyHA_antisem.txt")
write(regs_hetero_web_mobile$reg_tab_antisem,  "current_draft/appendix_figures/tab_hetero_web_mobile_antisem.txt")
write(regs_hetero_web_pid7$reg_tab_antisem,    "current_draft/appendix_figures/tab_hetero_web_pid7_antisem.txt")

#  Export tables SI-37, 40, 43 ... 58: other HA sites
write(regs_hetero_web_CRT$reg_tab_other,     "current_draft/appendix_figures/tab_hetero_web_CRT_other.txt")
write(regs_hetero_web_empathy$reg_tab_other, "current_draft/appendix_figures/tab_hetero_web_empathy_other.txt")
write(regs_hetero_web_conspir$reg_tab_other, "current_draft/appendix_figures/tab_hetero_web_conspir_other.txt")
write(regs_hetero_web_knowl$reg_tab_other,   "current_draft/appendix_figures/tab_hetero_web_knowl_other.txt")
write(regs_hetero_web_contact$reg_tab_other, "current_draft/appendix_figures/tab_hetero_web_contact_other.txt")
write(regs_hetero_web_anyHA$reg_tab_other,   "current_draft/appendix_figures/tab_hetero_web_anyHA_other.txt")
write(regs_hetero_web_mobile$reg_tab_other,  "current_draft/appendix_figures/tab_hetero_web_mobile_other.txt")
write(regs_hetero_web_pid7$reg_tab_other,    "current_draft/appendix_figures/tab_hetero_web_pid7_other.txt")

#    Relationship b/w browsing and attitudes (Appendix 3.7, Table SI-59) --------------

d_survey %>% 
  select(variable, hypothesis_label, value, any_HA_antisem_pre:time_HA_total_log_pre) %>%
  select(variable, hypothesis_label, value, contains("antisem")) %>%
  select(-contains("_w2")) %>%
  filter(!grepl("sympathetic|w2", variable),
         grepl("policy_index|stereo_index|therm_jew|discrim_jew", variable)) %>%
  select(-variable) %>%
  rename(variable = hypothesis_label) %>%
  pivot_longer(
    -c(variable, value),
    names_to = "predictor_variable",
    values_to = "predictor_value"
  ) %>%
  filter(!is.na(predictor_value)) %>%
  group_by(
    variable, predictor_variable
  ) %>%
  mutate(
    predictor_value = (predictor_value - min(predictor_value)) / (max(predictor_value)-min(predictor_value)),
    predictor_variable = str_to_sentence(gsub("_.+", "", predictor_variable))
    
  ) %>%
  summarize(
    tidy(lm_robust(value ~ predictor_value)) %>% filter(term == "predictor_value")
  )  %>%
  select(Attitude = variable, Browsing = predictor_variable, Difference = estimate, SE = std.error, p = p.value) %>%
  mutate(Attitude = ifelse(Browsing == "Any", Attitude, "")) %>%
  xtable(
    caption="Relationship between pre-treatment browsing behavior and attitudinal DVs",
    label="tab:browsing_attitudes",
    digits=3
  ) %>%
  print(
    include.rownames=F,
    booktabs=T,
    caption.placement="top"
  ) %>%
  gsub("Feeling", "\\\\\\\\[-1.5ex] Feeling", .) %>%
  gsub("Policy", "\\\\\\\\[-1.5ex] Policy", .) %>%
  gsub("Stereo", "\\\\\\\\[-1.5ex] Stereo", .) %>%
  write(
    "current_draft/appendix_figures/tab_browsing_attitudes.txt"
  )

log_close()
